setwd("/home/Data/Genotype/Medixcan")
source("~/Projects/SzCausal/BaseCode/helper.R")
## Loading required package: Matrix
## Loaded glmnet 4.1-6
## Warning: package 'limma' was built under R version 4.2.1
## Loading required package: ggplot2
newprot=readRDS("syn_LMER_ENSG.RDS")
genotype.data = list()
genotype.data[[1]]=read.table("/home/Data/Genotype/Medixcan/Output/en_Brain_Cortex_outputVals.txt", header=T, row.names=1)
genotype.data[[2]]=read.table("/home/Data/Genotype/Medixcan/Output/en_Brain_Frontal_Cortex_BA9_outputVals.txt", header=T, row.names=1)
genotype.data[[3]]=read.table("/home/Data/Genotype/Medixcan/Output/en_Brain_Cerebellum_outputVals.txt", header=T, row.names=1)
genotype.data[[4]]=read.table("/home/Data/Genotype/Output2/en_Breast_outputVals.txt", header=T, row.names=1)
genotype.data[[5]]=read.table("/home/Data/Genotype/Medixcan/Output/psychencode_hg38_outputVals.txt",header=T, row.names=1)
names(genotype.data) = c("Cortex", "CortexBA9", "Cerebellum", "Breast", "psychENCODE")
Remove first column and get common columns
for (i in 1:5) {
genotype.data[[i]] = genotype.data[[i]][,-1]
genotype.data[[i]]=t(genotype.data[[i]])
colnames(genotype.data[[i]])=sub("0_","S", colnames(genotype.data[[i]]))
genotype.data[[i]]=genotype.data[[i]][,commonColumns(newprot,genotype.data[[i]])]
#get rid of transcript ID
namesSub=sub("\\..*", "", rownames(genotype.data[[i]]))
#make sure they are unique
rownames(genotype.data[[i]])=namesSub
any(table(namesSub)>1)
genotype.data[[i]]=as.matrix(genotype.data[[i]])
}
newprot=newprot[,commonColumns(newprot,genotype.data[[1]])] # they are the same
newprot=tscale(newprot)
newprot=as.matrix(newprot)
# pheno
library(readxl)
pheno = read_xlsx("../SzCausal_202211/oneDriveData/metadata/All_Subject_Demographics.xlsx")
pheno = as.data.frame(pheno)
rownames(pheno) = paste0(c("S"), pheno$HU)
pheno = pheno[colnames(newprot),]
# plot PCs for the genotype data
pheno$Race[pheno$Race == "W83"] = "W"
set.seed(1);genotype.svd=rsvd(genotype.data[[1]], k=20)
plotPCAggplotAll(genotype.data[[1]], svdres=genotype.svd, grp = pheno$Race)
plotPCAggplotAll(newprot, grp = pheno$Race)
# compute the SVD
genotypeCorrected = genotype.data
p = list()
for (i in 1:5) {
set.seed(1);genotype.svd=rsvd(genotype.data[[i]], k=20)
genotypeCorrected[[i]] = genotype.data[[i]]-SVDreconstruct(genotype.svd, k=2)
p[[i]] = plotPCAggplotAll(genotypeCorrected[[i]], grp = pheno$Race, title = names(genotypeCorrected)[i])
}
ggarrange(plotlist = p, ncol = 1, nrow = 1)
source("genoFuncs.R")
newProtCor = list()
for(i in 1:5) {
newProtCor[[i]]=getMatchedCorrelation(newprot, genotypeCorrected[[i]]) #input: matrix
}
names(newProtCor) = names(genotype.data)
set.seed(101)
randomCor = vector("list", 5)
for (i in 1:5) {
for(j in 1:50){
randomCor[[i]] = c(randomCor[[i]],getMatchedCorrelation(newprot, genotypeCorrected[[i]],resample = T))
}
}
names(randomCor) = names(newProtCor)
cor_violin_data = data.frame()
for (i in 1:5) {
cor = data.frame("cor" = newProtCor[[i]],
"model" = names(newProtCor)[[i]],
"type" = c('Matched'), row.names = NULL)
rand = data.frame("cor" = randomCor[[i]],
"model" = names(newProtCor)[[i]],
"type" = c('Random'), row.names = NULL)
cor_violin_data = rbind(cor_violin_data, cor, rand)
}
p1 <- ggplot(cor_violin_data, aes(x=type, y=cor, color=model), height = 5, width = 10) +
geom_violin(trim = F, position=position_dodge()) + facet_grid(. ~ model)
p2 <- ggplot(cor_violin_data, aes(x=model, y=cor, color=model)) +
geom_violin(trim = F, position=position_dodge()) + facet_grid(. ~ type)
my_comparisons <- list( c("Breast", "Cerebellum"),
c("Breast", "Cortex"),
c("Breast", "CortexBA9"),
c("Breast", "psychENCODE"))
library(ggplot2)
library(EnvStats)
##
## Attaching package: 'EnvStats'
## The following object is masked from 'package:Matrix':
##
## print
## The following objects are masked from 'package:stats':
##
## predict, predict.lm
## The following object is masked from 'package:base':
##
## print.default
pp1 = p1 +
geom_boxplot(width=0.2, position = position_dodge(width =0.9)) +
stat_compare_means(comparisons = list(c('Matched', 'Random')), label= "p.signif") +
stat_n_text()
pp2 = p2 +
geom_boxplot(width=0.2, position = position_dodge(width =0.9)) +
stat_compare_means(comparisons = my_comparisons, label= "p.signif") +
stat_n_text()
pv1 = p1 +
geom_boxplot(width=0.2, position = position_dodge(width =0.9)) +
stat_compare_means(comparisons = list(c('Matched', 'Random')), label= "p.adj") +
stat_n_text()
pv2 = p2 +
geom_boxplot(width=0.2, position = position_dodge(width =0.9)) +
stat_compare_means(comparisons = my_comparisons, label= "p.adj") +
stat_n_text()
ggarrange(pp1, pp2, heights = c(3.5, 4.5), ncol = 1, nrow = 2)
ggarrange(pv1, pv2, heights = c(3.5, 4.5), ncol = 1, nrow = 2)